

<!DOCTYPE html>


<html lang="en" data-content_root="" >

  <head>
    <meta charset="utf-8" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />

    <title>Post-Training Quantization with Qronos &#8212; Brevitas Documentation - dev</title>
  
  
  
  <script data-cfasync="false">
    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
  </script>
  
  <!-- Loaded before other Sphinx assets -->
  <link href="../_static/styles/theme.css?digest=3ee479438cf8b5e0d341" rel="stylesheet" />
<link href="../_static/styles/bootstrap.css?digest=3ee479438cf8b5e0d341" rel="stylesheet" />
<link href="../_static/styles/pydata-sphinx-theme.css?digest=3ee479438cf8b5e0d341" rel="stylesheet" />

  
  <link href="../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=3ee479438cf8b5e0d341" rel="stylesheet" />
  <link rel="preload" as="font" type="font/woff2" crossorigin href="../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />

    <link rel="stylesheet" type="text/css" href="../_static/pygments.css" />
    <link rel="stylesheet" type="text/css" href="../_static/sg_gallery.css" />
  
  <!-- Pre-loaded scripts that we'll load fully later -->
  <link rel="preload" as="script" href="../_static/scripts/bootstrap.js?digest=3ee479438cf8b5e0d341" />
<link rel="preload" as="script" href="../_static/scripts/pydata-sphinx-theme.js?digest=3ee479438cf8b5e0d341" />
  <script src="../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=3ee479438cf8b5e0d341"></script>

    <script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
    <script src="../_static/jquery.js"></script>
    <script src="../_static/underscore.js"></script>
    <script src="../_static/_sphinx_javascript_frameworks_compat.js"></script>
    <script src="../_static/doctools.js"></script>
    <script src="../_static/sphinx_highlight.js"></script>
    <script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
    <script>window.MathJax = {"tex": {"inlineMath": [["$", "$"], ["\\(", "\\)"]], "processEscapes": true}, "options": {"ignoreHtmlClass": "tex2jax_ignore|mathjax_ignore|document", "processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
    <script>DOCUMENTATION_OPTIONS.pagename = 'papers/qronos';</script>
    <script>
        DOCUMENTATION_OPTIONS.theme_version = '0.15.3';
        DOCUMENTATION_OPTIONS.theme_switcher_json_url = 'https://xilinx.github.io/brevitas/dev/_static/versions.json';
        DOCUMENTATION_OPTIONS.theme_switcher_version_match = 'dev';
        DOCUMENTATION_OPTIONS.show_version_warning_banner = false;
        </script>
    <link rel="author" title="About these documents" href="../about.html" />
    <link rel="index" title="Index" href="../genindex.html" />
    <link rel="search" title="Search" href="../search.html" />
    <link rel="next" title="User Guides" href="../user_guide/index.html" />
    <link rel="prev" title="Papers" href="index.html" />
  <meta name="viewport" content="width=device-width, initial-scale=1"/>
  <meta name="docsearch:language" content="en"/>
  </head>
  
  
  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">

  
  
  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
  
  <div id="pst-scroll-pixel-helper"></div>
  
  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
    <i class="fa-solid fa-arrow-up"></i>Back to top</button>

  
  <input type="checkbox"
          class="sidebar-toggle"
          id="pst-primary-sidebar-checkbox"/>
  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
  
  <input type="checkbox"
          class="sidebar-toggle"
          id="pst-secondary-sidebar-checkbox"/>
  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
  
  <div class="search-button__wrapper">
    <div class="search-button__overlay"></div>
    <div class="search-button__search-container">
<form class="bd-search d-flex align-items-center"
      action="../search.html"
      method="get">
  <i class="fa-solid fa-magnifying-glass"></i>
  <input type="search"
         class="form-control"
         name="q"
         id="search-input"
         placeholder="Search the docs ..."
         aria-label="Search the docs ..."
         autocomplete="off"
         autocorrect="off"
         autocapitalize="off"
         spellcheck="false"/>
  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
</form></div>
  </div>

  <div class="pst-async-banner-revealer d-none">
  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
</div>

  
    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
<div class="bd-header__inner bd-page-width">
  <button class="sidebar-toggle primary-toggle" aria-label="Site navigation">
    <span class="fa-solid fa-bars"></span>
  </button>
  
  
  <div class="col-lg-3 navbar-header-items__start">
    
      <div class="navbar-item">

  

<a class="navbar-brand logo" href="../index.html">
  
  
  
  
  
    
    
      
    
    
    <img src="../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas Documentation - dev - Home"/>
    <script>document.write(`<img src="../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas Documentation - dev - Home"/>`);</script>
  
  
</a></div>
    
  </div>
  
  <div class="col-lg-9 navbar-header-items">
    
    <div class="me-auto navbar-header-items__center">
      
        <div class="navbar-item">
<nav class="navbar-nav">
  <ul class="bd-navbar-elements navbar-nav">
    
<li class="nav-item pst-header-nav-item">
  <a class="nav-link nav-internal" href="../setup.html">
    Setup
  </a>
</li>


<li class="nav-item pst-header-nav-item">
  <a class="nav-link nav-internal" href="../getting_started.html">
    Getting Started
  </a>
</li>


<li class="nav-item pst-header-nav-item">
  <a class="nav-link nav-internal" href="../tutorials/index.html">
    Tutorials
  </a>
</li>


<li class="nav-item pst-header-nav-item current active">
  <a class="nav-link nav-internal" href="index.html">
    Papers
  </a>
</li>


<li class="nav-item pst-header-nav-item">
  <a class="nav-link nav-internal" href="../user_guide/index.html">
    User Guides
  </a>
</li>


<li class="nav-item pst-header-nav-item">
  <a class="nav-link nav-internal" href="../settings.html">
    Settings
  </a>
</li>

            <li class="nav-item dropdown pst-header-nav-item">
                <button class="btn dropdown-toggle nav-item" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-controls="pst-nav-more-links">
                    More
                </button>
                <ul id="pst-nav-more-links" class="dropdown-menu">
                    
<li class="nav-item ">
  <a class="nav-link dropdown-item nav-internal" href="../faq.html">
    FAQ
  </a>
</li>


<li class="nav-item ">
  <a class="nav-link dropdown-item nav-internal" href="../api_reference/index.html">
    API reference
  </a>
</li>


<li class="nav-item ">
  <a class="nav-link dropdown-item nav-internal" href="../about.html">
    About
  </a>
</li>

                </ul>
            </li>
            
  </ul>
</nav></div>
      
    </div>
    
    
    <div class="navbar-header-items__end">
      
        <div class="navbar-item navbar-persistent--container">
          

 <script>
 document.write(`
   <button class="btn navbar-btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
    <i class="fa-solid fa-magnifying-glass"></i>
    <span class="search-button__default-text">Search</span>
    <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
   </button>
 `);
 </script>
        </div>
      
      
        <div class="navbar-item">

<script>
document.write(`
  <button class="btn btn-sm navbar-btn theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
    <span class="theme-switch nav-link" data-mode="light"><i class="fa-solid fa-sun fa-lg"></i></span>
    <span class="theme-switch nav-link" data-mode="dark"><i class="fa-solid fa-moon fa-lg"></i></span>
    <span class="theme-switch nav-link" data-mode="auto"><i class="fa-solid fa-circle-half-stroke fa-lg"></i></span>
  </button>
`);
</script></div>
      
    </div>
    
  </div>
  
  
    <div class="navbar-persistent--mobile">

 <script>
 document.write(`
   <button class="btn navbar-btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
    <i class="fa-solid fa-magnifying-glass"></i>
    <span class="search-button__default-text">Search</span>
    <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
   </button>
 `);
 </script>
    </div>
  

  
    <button class="sidebar-toggle secondary-toggle" aria-label="On this page">
      <span class="fa-solid fa-outdent"></span>
    </button>
  
</div>

    </header>
  

  <div class="bd-container">
    <div class="bd-container__inner bd-page-width">
      
      
      
      <div class="bd-sidebar-primary bd-sidebar">
        

  
  <div class="sidebar-header-items sidebar-primary__section">
    
    
      <div class="sidebar-header-items__center">
        
          
          
            <div class="navbar-item">
<nav class="navbar-nav">
  <ul class="bd-navbar-elements navbar-nav">
    
<li class="nav-item pst-header-nav-item">
  <a class="nav-link nav-internal" href="../setup.html">
    Setup
  </a>
</li>


<li class="nav-item pst-header-nav-item">
  <a class="nav-link nav-internal" href="../getting_started.html">
    Getting Started
  </a>
</li>


<li class="nav-item pst-header-nav-item">
  <a class="nav-link nav-internal" href="../tutorials/index.html">
    Tutorials
  </a>
</li>


<li class="nav-item pst-header-nav-item current active">
  <a class="nav-link nav-internal" href="index.html">
    Papers
  </a>
</li>


<li class="nav-item pst-header-nav-item">
  <a class="nav-link nav-internal" href="../user_guide/index.html">
    User Guides
  </a>
</li>


<li class="nav-item pst-header-nav-item">
  <a class="nav-link nav-internal" href="../settings.html">
    Settings
  </a>
</li>


<li class="nav-item pst-header-nav-item">
  <a class="nav-link nav-internal" href="../faq.html">
    FAQ
  </a>
</li>


<li class="nav-item pst-header-nav-item">
  <a class="nav-link nav-internal" href="../api_reference/index.html">
    API reference
  </a>
</li>


<li class="nav-item pst-header-nav-item">
  <a class="nav-link nav-internal" href="../about.html">
    About
  </a>
</li>

  </ul>
</nav></div>
          
        
      </div>
    
    
    
      <div class="sidebar-header-items__end">
        
          <div class="navbar-item">

<script>
document.write(`
  <button class="btn btn-sm navbar-btn theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
    <span class="theme-switch nav-link" data-mode="light"><i class="fa-solid fa-sun fa-lg"></i></span>
    <span class="theme-switch nav-link" data-mode="dark"><i class="fa-solid fa-moon fa-lg"></i></span>
    <span class="theme-switch nav-link" data-mode="auto"><i class="fa-solid fa-circle-half-stroke fa-lg"></i></span>
  </button>
`);
</script></div>
        
      </div>
    
  </div>
  
    <div class="sidebar-primary-items__start sidebar-primary__section">
        <div class="sidebar-primary-item">
<nav class="bd-docs-nav bd-links"
     aria-label="Section Navigation">
  <p class="bd-links__title" role="heading" aria-level="1">Section Navigation</p>
  <div class="bd-toc-item navbar-nav"><ul class="current nav bd-sidenav">
<li class="toctree-l1 current active"><a class="current reference internal" href="#">Qronos</a></li>
</ul>
</div>
</nav></div>
    </div>
  
  
  <div class="sidebar-primary-items__end sidebar-primary__section">
  </div>
  
  <div id="rtd-footer-container"></div>


      </div>
      
      <main id="main-content" class="bd-main" role="main">
        
        
          <div class="bd-content">
            <div class="bd-article-container">
              
              <div class="bd-header-article d-print-none">
<div class="header-article-items header-article__inner">
  
    <div class="header-article-items__start">
      
        <div class="header-article-item">



<nav aria-label="Breadcrumb" class="d-print-none">
  <ul class="bd-breadcrumbs">
    
    <li class="breadcrumb-item breadcrumb-home">
      <a href="../index.html" class="nav-link" aria-label="Home">
        <i class="fa-solid fa-home"></i>
      </a>
    </li>
    
    <li class="breadcrumb-item"><a href="index.html" class="nav-link">Papers</a></li>
    
    <li class="breadcrumb-item active" aria-current="page">Post-Trainin...</li>
  </ul>
</nav>
</div>
      
    </div>
  
  
</div>
</div>
              
              
              
                
<div id="searchbox"></div>
                <article class="bd-article">
                  
  <section id="post-training-quantization-with-qronos">
<h1>Post-Training Quantization with Qronos<a class="headerlink" href="#post-training-quantization-with-qronos" title="Permalink to this heading">#</a></h1>
<p>Qronos is a new post-training quantization (PTQ) algorithm that sequentially rounds
and updates neural network weights to explicitly address quantization errors that
have been introduced in both the weights and activations of previous layers. At each
iteration, Qronos first selects the quantized weight that optimally corrects the current
approximation error while holding the remaining weights fixed. It then updates the future
(yet-to-be quantized) weights to optimally compensate for the rounding error. Let’s dive
into the Qronos algorithm and how to use it with Brevitas!</p>
<div align="center">
       <a href="https://arxiv.org/pdf/2505.11695">📄 Paper</a>&nbsp
            <a href="https://github.com/Xilinx/brevitas/blob/dev/src/brevitas/graph/qronos.py">💻 Code</a>
</div><nav class="contents local" id="table-of-contents">
<p class="topic-title">Table of Contents</p>
<ul class="simple">
<li><p><a class="reference internal" href="#about-the-algorithm" id="id3">About the Algorithm</a></p></li>
<li><p><a class="reference internal" href="#getting-started" id="id4">Getting Started</a></p></li>
<li><p><a class="reference internal" href="#how-to-use-few-bit-llm-quantization" id="id5">How to Use: Few-Bit LLM Quantization</a></p>
<ul>
<li><p><a class="reference internal" href="#weight-only-quantization" id="id6">Weight-only quantization</a></p>
<ul>
<li><p><a class="reference internal" href="#and-4-bit-weights" id="id7">3 and 4 bit weights</a></p></li>
<li><p><a class="reference internal" href="#or-1-58-bit-weights" id="id8">2 or 1.58 bit weights</a></p></li>
</ul>
</li>
<li><p><a class="reference internal" href="#weight-activation-quantization" id="id9">Weight-activation quantization</a></p>
<ul>
<li><p><a class="reference internal" href="#quarot-with-int4-and-mxfp4" id="id10">QuaRot with INT4 and MXFP4</a></p></li>
<li><p><a class="reference internal" href="#spinquant-with-int4-and-mxfp4" id="id11">SpinQuant with INT4 and MXFP4</a></p></li>
</ul>
</li>
<li><p><a class="reference internal" href="#gguf-q4-0-model-export-for-llama-cpp" id="id12">GGUF:Q4_0 model export for llama.cpp</a></p></li>
</ul>
</li>
<li><p><a class="reference internal" href="#citation" id="id13">Citation</a></p></li>
<li><p><a class="reference internal" href="#references" id="id14">References</a></p></li>
</ul>
</nav>
<section id="about-the-algorithm">
<h2><a class="toc-backref" href="#id3" role="doc-backlink">About the Algorithm</a><a class="headerlink" href="#about-the-algorithm" title="Permalink to this heading">#</a></h2>
<p>PTQ techniques typically aim to solve the layerwise reconstruction problem given by</p>
<div class="math notranslate nohighlight">
\[\operatorname{argmin}_Q \Vert X^T W - X^T Q \Vert\]</div>
<p>where, for inputs <span class="math notranslate nohighlight">\(X\)</span> and weights <span class="math notranslate nohighlight">\(W\)</span>, the goal is to find the quantized
weights <span class="math notranslate nohighlight">\(Q\)</span> that minimize the impact of quantization error on the behavior of the
model.</p>
<p>However, this formulation has no awareness of errors resulting from previously quantized
layers and/or activation quantization; therefore, algorithms designed to solve the standard
reconstruction problem (e.g., GPTQ, more recently known as OPTQ [1]) cannot explicitly
correct for these sources of error.</p>
<p>Qronos considers the “mismatched” reconstruction problem (initially formulated and analyzed
by Lybrand and Saab [2]), which explicitly addresses these questions via</p>
<div class="math notranslate nohighlight">
\[\operatorname{argmin}_Q \Vert X^T W - \tilde{X}^T Q \Vert\]</div>
<p>where <span class="math notranslate nohighlight">\(\tilde{X}\)</span> is the (potentially quantized) inputs from the previously quantized layers.</p>
<p>To solve this problem, Qronos quantizes weights one-by-one by alternating between two steps:
(1) error correction, where a quantized weight is selected to optimally correct the error; and
(2) error diffusion, where the remaining unquantized weights are updated to compensate for the
accumulated rounding error. To do so efficiently, Qronos benefits from the same techniques used
to scale GPTQ to increasingly large models (e.g., Cholesky decomposition and lazy batch
updates), and consistently produces quantized models with better accuracy!</p>
<p>🔍 Check out the <a class="reference external" href="https://arxiv.org/pdf/2505.11695">paper</a> for formalized objective
functions, derivations, and analyses!</p>
</section>
<section id="getting-started">
<h2><a class="toc-backref" href="#id4" role="doc-backlink">Getting Started</a><a class="headerlink" href="#getting-started" title="Permalink to this heading">#</a></h2>
<p>Below are the versions used for these results; different versions may yield different results.</p>
<ul class="simple">
<li><p><code class="docutils literal notranslate"><span class="pre">python==3.12</span></code></p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">torch==2.4.0+rocm6.1</span></code></p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">datasets==3.2.0</span></code></p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">optimum==1.24.0</span></code></p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">accelerate==1.3.0</span></code></p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">transformers==4.51.3</span></code> (custom fork, see below)</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">fast_hadamard-transform==1.0.4</span></code> (custom fork, see below)</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">lighteval==0.6.0</span></code> (custom fork, see below)</p></li>
</ul>
<p>You can install PyTorch for ROCm 6.1 via:</p>
<div class="highlight-shell notranslate"><div class="highlight"><pre><span></span>pip<span class="w"> </span>install<span class="w"> </span><span class="nv">torch</span><span class="o">==</span><span class="m">2</span>.4.0<span class="w"> </span>torchvision<span class="w"> </span>torchaudio<span class="w"> </span>--index-url<span class="w"> </span>https://download.pytorch.org/whl/rocm6.1
</pre></div>
</div>
<p>You can install and build a fork of the <code class="docutils literal notranslate"><span class="pre">fast_hadamard_transform</span></code> library with ROCm support
via:</p>
<div class="highlight-shell notranslate"><div class="highlight"><pre><span></span>git<span class="w"> </span>clone<span class="w"> </span>https://github.com/jeffdaily/fast-hadamard-transform<span class="w"> </span>-b<span class="w"> </span>rocm
<span class="nb">cd</span><span class="w"> </span>fast-hadamard-transform
pip<span class="w"> </span>install<span class="w"> </span>-e<span class="w"> </span>.
</pre></div>
</div>
<p>There is a known issue with <code class="docutils literal notranslate"><span class="pre">lighteval</span></code> v0.6.0 (see <a class="reference external" href="https://github.com/huggingface/lighteval/issues/489">#489</a>).
To collect zero-shot results, we use the patched fork:</p>
<div class="highlight-shell notranslate"><div class="highlight"><pre><span></span>git<span class="w"> </span>clone<span class="w"> </span>https://github.com/Giuseppe5/lighteval
<span class="nb">cd</span><span class="w"> </span>lighteval
pip<span class="w"> </span>install<span class="w"> </span>.
</pre></div>
</div>
<p>There is also a known issue with <code class="docutils literal notranslate"><span class="pre">transformers==4.51.3</span></code> when also using
<code class="docutils literal notranslate"><span class="pre">torch=2.4</span></code> (see <a class="reference external" href="https://github.com/huggingface/transformers/issues/38271">#38271</a>),
which only impacts QuaRot and SpinQuant. You can install a patched fork via:</p>
<div class="highlight-shell notranslate"><div class="highlight"><pre><span></span>git<span class="w"> </span>clone<span class="w"> </span>https://github.com/i-colbert/transformers<span class="w"> </span>-b<span class="w"> </span>v4.51.3-patch
<span class="nb">cd</span><span class="w"> </span>transformers
pip<span class="w"> </span>install<span class="w"> </span>-e<span class="w"> </span>.
</pre></div>
</div>
<p>Note that you may be able to avoid this issue with later versions of <code class="docutils literal notranslate"><span class="pre">torch</span></code>, but your
results may differ from those reported here.</p>
</section>
<section id="how-to-use-few-bit-llm-quantization">
<h2><a class="toc-backref" href="#id5" role="doc-backlink">How to Use: Few-Bit LLM Quantization</a><a class="headerlink" href="#how-to-use-few-bit-llm-quantization" title="Permalink to this heading">#</a></h2>
<p>With Brevitas, you can apply the Qronos algorithm to quantize HuggingFace models via
<a class="reference external" href="https://github.com/Xilinx/brevitas/tree/dev/src/brevitas_examples/llm">our LLM entry point</a>!</p>
<p>We provide packaged config files in <a class="reference external" href="https://github.com/Xilinx/brevitas/tree/dev/src/brevitas_examples/papers/qronos">brevitas_examples/papers/qronos</a> to enable
similar experiments described in the paper. The provided configurations specify Llama-3.2-1B,
but you can specify different Huggingface models in the CLI args. For example:</p>
<div class="highlight-shell notranslate"><div class="highlight"><pre><span></span>brevitas_ptq_llm<span class="w"> </span>--config<span class="o">=</span>llama3-w4-base.yml<span class="w"> </span>--model<span class="o">=</span>meta-llama/Llama-3.2-3B-Instruct
</pre></div>
</div>
<p>The BF16 baselines give a WikiText2 perplexity of 8.94 and an average normalized 0-shot
accuracy (reported as “all_acc_norm” in LightEval) of 59.40% via:</p>
<div class="highlight-shell notranslate"><div class="highlight"><pre><span></span>brevitas_ptq_llm<span class="w"> </span>--config<span class="o">=</span>llama3-w4-base.yml<span class="w"> </span>--no-quantize
</pre></div>
</div>
<p>🧪 Next, we will share our results for weight-only quantization and weight-activation quantization
for Llama3.2-1B. We encourage you to try more models and formats, and share your results!</p>
<section id="weight-only-quantization">
<h3><a class="toc-backref" href="#id6" role="doc-backlink">Weight-only quantization</a><a class="headerlink" href="#weight-only-quantization" title="Permalink to this heading">#</a></h3>
<p>Weight-only quantization compresses neural networks by quantizing just the weights (e.g.,
INT4), while keeping activations in full precision (e.g., BF16). It reduces model size and
memory usage, often with minimal impact on accuracy if one is intentional with calibration.
Here, we will demonstrate how you can use Qronos to calibrate weights quantized to 4 or fewer
bits.</p>
<section id="and-4-bit-weights">
<h4><a class="toc-backref" href="#id7" role="doc-backlink">3 and 4 bit weights</a><a class="headerlink" href="#and-4-bit-weights" title="Permalink to this heading">#</a></h4>
<p>Below, we summarize the results when quantizing only the weights of Llama-3.2-1B to 3 or 4
bits. We compare Qronos to GPTQ and GPFQ. We provide round-to-nearest (RTN) as a baseline,
where weights are directly casted to the data format and no calibration is applied.</p>
<div class="pst-scrollable-table-container"><table class="table">
<tbody>
<tr class="row-odd"><td></td>
<td colspan="2"><p>3-bit</p></td>
<td colspan="2"><p>4-bit</p></td>
</tr>
<tr class="row-even"><td></td>
<td><p>Wiki2 (↓)</p></td>
<td><p>0-shot (↑)</p></td>
<td><p>Wiki2 (↓)</p></td>
<td><p>0-shot (↑)</p></td>
</tr>
<tr class="row-odd"><td><p>RTN</p></td>
<td><p>2e4</p></td>
<td><p>32.24</p></td>
<td><p>18.00</p></td>
<td><p>48.95</p></td>
</tr>
<tr class="row-even"><td><p>GPTQ</p></td>
<td><p>40.50</p></td>
<td><p>38.15</p></td>
<td><p>10.44</p></td>
<td><p>55.39</p></td>
</tr>
<tr class="row-odd"><td><p>GPFQ</p></td>
<td><p>40.50</p></td>
<td><p>37.34</p></td>
<td><p>10.56</p></td>
<td><p>54.88</p></td>
</tr>
<tr class="row-even"><td><p>Qronos</p></td>
<td><p><strong>22.00</strong></p></td>
<td><p><strong>40.32</strong></p></td>
<td><p><strong>10.12</strong></p></td>
<td><p><strong>55.87</strong></p></td>
</tr>
</tbody>
</table>
</div>
<p>You can collect 4-bit weight-only results with the <code class="docutils literal notranslate"><span class="pre">lama3-w4-base.yml</span></code> config via:</p>
<div class="highlight-shell notranslate"><div class="highlight"><pre><span></span>brevitas_ptq_llm<span class="w"> </span>--config<span class="o">=</span>llama3-w4-base.yml<span class="w"> </span>--qronos
</pre></div>
</div>
<p>You can instead specify GPTQ or GPFQ by using <code class="docutils literal notranslate"><span class="pre">--gptq</span></code> or <code class="docutils literal notranslate"><span class="pre">--gpfq</span></code> instead, which are
mutually exclusive algorithms. You can also specify a different bit width in the CLI args.
For example:</p>
<div class="highlight-shell notranslate"><div class="highlight"><pre><span></span>brevitas_ptq_llm<span class="w"> </span>--config<span class="o">=</span>llama3-w4-base.yml<span class="w"> </span>--weight-bit-width<span class="o">=</span><span class="m">3</span><span class="w"> </span>--qronos
</pre></div>
</div>
<p>However, we recommend the following config when quantizing to 2 bits or fewer.</p>
</section>
<section id="or-1-58-bit-weights">
<h4><a class="toc-backref" href="#id8" role="doc-backlink">2 or 1.58 bit weights</a><a class="headerlink" href="#or-1-58-bit-weights" title="Permalink to this heading">#</a></h4>
<p>Quantizing to 2 bits or fewer with minimal degradation requires an intential effort to reduce
quantization error that arises from different sources. Indeed, the latest innovations in PTQ
are skewed towards proposing or improving transformations that make weights and/or activations
more amenable to quantization by limiting the impact of outliers, which is another source of
quantization error. With Brevitas, you can compose one or more of these transformations with
Qronos to jointly reduce the impact of outliers while correcting quantization in both weights
and activations.</p>
<p>The following table summarizes the results of weight-only quantization on Llama-3.2-1B
when jointly using Hadamard-based incoherence processing (HIP) [3] and weight magnitude
reduction (MagR)[4] as our quantization transform. We then compare adaptive rounding functions
when quantizing the model to 1.58-bit (i.e., ternary) or 2-bit weights.</p>
<div class="pst-scrollable-table-container"><table class="table">
<tbody>
<tr class="row-odd"><td></td>
<td colspan="2"><p>1.58-bit</p></td>
<td colspan="2"><p>2-bit</p></td>
</tr>
<tr class="row-even"><td></td>
<td><p>Wiki2 (↓)</p></td>
<td><p>0-shot (↑)</p></td>
<td><p>Wiki2 (↓)</p></td>
<td><p>0-shot (↑)</p></td>
</tr>
<tr class="row-odd"><td><p>RTN</p></td>
<td><p>2e5</p></td>
<td><p>32.78</p></td>
<td><p>3e3</p></td>
<td><p>32.22</p></td>
</tr>
<tr class="row-even"><td><p>OPTQ</p></td>
<td><p>3e2</p></td>
<td><p>33.09</p></td>
<td><p>25.00</p></td>
<td><p>38.96</p></td>
</tr>
<tr class="row-odd"><td><p>GPFQ</p></td>
<td><p>1e2</p></td>
<td><p>33.21</p></td>
<td><p>26.25</p></td>
<td><p>38.73</p></td>
</tr>
<tr class="row-even"><td><p>Qronos</p></td>
<td><p><strong>39.25</strong></p></td>
<td><p><strong>34.11</strong></p></td>
<td><p><strong>18.00</strong></p></td>
<td><p><strong>42.42</strong></p></td>
</tr>
</tbody>
</table>
</div>
<p>We provide <code class="docutils literal notranslate"><span class="pre">llama3-w2-hip-magr.yml</span></code> as an example, which you can run via:</p>
<div class="highlight-shell notranslate"><div class="highlight"><pre><span></span>brevitas_ptq_llm<span class="w"> </span>--config<span class="o">=</span>llama3-w2-hip-magr.yml<span class="w"> </span>--weight-bit-width<span class="o">=</span><span class="m">2</span><span class="w"> </span>--qronos
</pre></div>
</div>
<p>and you can quantize to 1.58 bits via:</p>
<div class="highlight-shell notranslate"><div class="highlight"><pre><span></span>brevitas_ptq_llm<span class="w"> </span>--config<span class="o">=</span>llama3-w2-hip-magr.yml<span class="w"> </span>--weight-bit-width<span class="o">=</span><span class="m">2</span><span class="w"> </span>--weight-narrow-range<span class="w"> </span>--qronos
</pre></div>
</div>
<p>where <code class="docutils literal notranslate"><span class="pre">--weight-bit-width=2</span> <span class="pre">--weight-narrow-range</span></code> restricts the
quantization alphabet to <span class="math notranslate nohighlight">\(\mathcal{A}=\{-1, 0, 1\}\)</span>.</p>
</section>
</section>
<section id="weight-activation-quantization">
<h3><a class="toc-backref" href="#id9" role="doc-backlink">Weight-activation quantization</a><a class="headerlink" href="#weight-activation-quantization" title="Permalink to this heading">#</a></h3>
<p>Weight-activation quantization constrains both weights and activations to low-precision formats
(e.g., INT4 or MXFP4), enabling low-precision computations. It also offers memory and compute
savings, but often requires more careful calibration to maintain accuracy.</p>
<section id="quarot-with-int4-and-mxfp4">
<h4><a class="toc-backref" href="#id10" role="doc-backlink">QuaRot with INT4 and MXFP4</a><a class="headerlink" href="#quarot-with-int4-and-mxfp4" title="Permalink to this heading">#</a></h4>
<p>QuaRot [3] is a rotation-based quantization method that applies Hadamard transformations to
neural network weights and activations to remove outliers before quantization, enabling
accurate low-bit quantization. With Brevitas, you can similarly apply and fuse Hadamard
rotations then apply Qronos (or other adaptive rounding alorithms). The following table
summarizes the results of quantizing the weights and activations of Llama-3.2-1B to INT4 or
MXFP4. We compare Qronos with GPTQ and GPFQ and provide RTN as a baseline.</p>
<div class="pst-scrollable-table-container"><table class="table">
<tbody>
<tr class="row-odd"><td></td>
<td colspan="2"><p>INT4</p></td>
<td colspan="2"><p>MXFP4</p></td>
</tr>
<tr class="row-even"><td></td>
<td><p>Wiki2 (↓)</p></td>
<td><p>0-shot (↑)</p></td>
<td><p>Wiki2 (↓)</p></td>
<td><p>0-shot (↑)</p></td>
</tr>
<tr class="row-odd"><td><p>RTN</p></td>
<td><p>18.00</p></td>
<td><p>48.31</p></td>
<td><p>15.38</p></td>
<td><p>49.53</p></td>
</tr>
<tr class="row-even"><td><p>OPTQ</p></td>
<td><p>12.94</p></td>
<td><p>50.58</p></td>
<td><p>12.00</p></td>
<td><p>52.93</p></td>
</tr>
<tr class="row-odd"><td><p>GPFQ</p></td>
<td><p><strong>12.38</strong></p></td>
<td><p><strong>52.73</strong></p></td>
<td><p><strong>11.25</strong></p></td>
<td><p>53.45</p></td>
</tr>
<tr class="row-even"><td><p>Qronos</p></td>
<td><p><strong>12.38</strong></p></td>
<td><p>51.86</p></td>
<td><p><strong>11.25</strong></p></td>
<td><p><strong>53.71</strong></p></td>
</tr>
</tbody>
</table>
</div>
<p>To apply weight-activation quantization with Hadamard rotations similar to QuaRot [4], we
provide <code class="docutils literal notranslate"><span class="pre">llama3-w4a4-int-quarot.yml</span></code> and <code class="docutils literal notranslate"><span class="pre">llama3-w4a4-mxfp-quarot.yml</span></code>. For example:</p>
<div class="highlight-shell notranslate"><div class="highlight"><pre><span></span>brevitas_ptq_llm<span class="w"> </span>--config<span class="o">=</span>llama3-w4a4-int-quarot.yml<span class="w"> </span>--qronos
</pre></div>
</div>
<p>Again, using <code class="docutils literal notranslate"><span class="pre">--gptq</span></code> or <code class="docutils literal notranslate"><span class="pre">--gpfq</span></code> would instead run GPTQ or GPFQ.</p>
</section>
<section id="spinquant-with-int4-and-mxfp4">
<h4><a class="toc-backref" href="#id11" role="doc-backlink">SpinQuant with INT4 and MXFP4</a><a class="headerlink" href="#spinquant-with-int4-and-mxfp4" title="Permalink to this heading">#</a></h4>
<p>SpinQuant [5] is a more recent rotation-based quantization method that learns rotation matrices
based on Cayley optimization. With Brevitas, you can similarly learn and fused these rotations,
then apply Qronos (or other adaptive rounding algorithms). The following table summarizes the
results of quantizing the weights and activations of Llama-3.2-1B to INT4 or MXFP4 using
Cayley-optimized rotations. We compare Qronos with GPTQ and GPFQ and provide RTN as a baseline.</p>
<div class="pst-scrollable-table-container"><table class="table">
<tbody>
<tr class="row-odd"><td></td>
<td colspan="2"><p>INT4</p></td>
<td colspan="2"><p>MXFP4</p></td>
</tr>
<tr class="row-even"><td></td>
<td><p>Wiki2 (↓)</p></td>
<td><p>0-shot (↑)</p></td>
<td><p>Wiki2 (↓)</p></td>
<td><p>0-shot (↑)</p></td>
</tr>
<tr class="row-odd"><td><p>RTN</p></td>
<td><p>12.25</p></td>
<td><p>52.08</p></td>
<td><p>11.76</p></td>
<td><p>53.61</p></td>
</tr>
<tr class="row-even"><td><p>OPTQ</p></td>
<td><p>12.30</p></td>
<td><p>53.09</p></td>
<td><p>11.79</p></td>
<td><p>53.25</p></td>
</tr>
<tr class="row-odd"><td><p>GPFQ</p></td>
<td><p>12.28</p></td>
<td><p>52.85</p></td>
<td><p>11.35</p></td>
<td><p>53.22</p></td>
</tr>
<tr class="row-even"><td><p>Qronos</p></td>
<td><p><strong>11.52</strong></p></td>
<td><p><strong>54.00</strong></p></td>
<td><p><strong>10.80</strong></p></td>
<td><p><strong>54.83</strong></p></td>
</tr>
</tbody>
</table>
</div>
<p>Unlike the original SpinQuant proposal, which learns rotations after activation quantization
but before weight quantization, Brevitas learns rotations after quantizing both weights and
activations. Interestingly, only Qronos is able to improve both perplexity and 0-shot
performance over RTN.</p>
<p>To apply Cayley-optimized rotations similar to SpinQuant [5], we use
<code class="docutils literal notranslate"><span class="pre">llama3-w4a4-int-spinquant.yml</span></code> and <code class="docutils literal notranslate"><span class="pre">llama3-w4a4-mxfp-spinquant</span></code>. These can be run for
example:</p>
<div class="highlight-shell notranslate"><div class="highlight"><pre><span></span>brevitas_ptq_llm<span class="w"> </span>--config<span class="o">=</span>config/llama3-w4a4-int-spinquant.yml<span class="w"> </span>--qronos
</pre></div>
</div>
<p>Again, adding <code class="docutils literal notranslate"><span class="pre">--gptq</span></code> or <code class="docutils literal notranslate"><span class="pre">--gpfq</span></code> would instead run GPTQ or GPFQ.</p>
</section>
</section>
<section id="gguf-q4-0-model-export-for-llama-cpp">
<h3><a class="toc-backref" href="#id12" role="doc-backlink">GGUF:Q4_0 model export for llama.cpp</a><a class="headerlink" href="#gguf-q4-0-model-export-for-llama-cpp" title="Permalink to this heading">#</a></h3>
<p>You can also export the quantized model to several GGUF formats for use with llama.cpp as
described in our <a class="reference external" href="https://xilinx.github.io/brevitas/dev/user_guide/export_gguf.html">GGUF export documentation</a>.</p>
<p>In this example, we export the quantized models to the GGUF:Q4_0 format</p>
<div class="highlight-shell notranslate"><div class="highlight"><pre><span></span>brevitas_ptq_llm<span class="w"> </span>--config<span class="o">=</span>llama3-gguf-q4_0.yml<span class="w"> </span>--qronos
</pre></div>
</div>
<p>Note that the file “Llama-3.2-1B-1.2B-Q4_0.gguf” will be created in the current directory.</p>
<p>The following table summarizes the results of weight-only quantization of Llama-3.2-1B to
the GGUF:Q4_0  format, comparing Qronos with GPTQ and GPFQ, where RTN is again provided as a
baseline.</p>
<div class="pst-scrollable-table-container"><table class="table">
<tbody>
<tr class="row-odd"><td></td>
<td><p>Wiki2 (↓)</p></td>
<td><p>0-shot (↑)</p></td>
</tr>
<tr class="row-even"><td><p>RTN</p></td>
<td><p>10.44</p></td>
<td><p>56.81</p></td>
</tr>
<tr class="row-odd"><td><p>OPTQ</p></td>
<td><p>9.50</p></td>
<td><p>57.96</p></td>
</tr>
<tr class="row-even"><td><p>GPFQ</p></td>
<td><p>9.50</p></td>
<td><p><strong>57.99</strong></p></td>
</tr>
<tr class="row-odd"><td><p>Qronos</p></td>
<td><p><strong>9.31</strong></p></td>
<td><p>57.88</p></td>
</tr>
</tbody>
</table>
</div>
</section>
</section>
<section id="citation">
<h2><a class="toc-backref" href="#id13" role="doc-backlink">Citation</a><a class="headerlink" href="#citation" title="Permalink to this heading">#</a></h2>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="nd">@article</span><span class="p">{</span><span class="n">zhang2025qronos</span><span class="p">,</span>
      <span class="n">title</span><span class="o">=</span><span class="p">{</span><span class="n">Qronos</span><span class="p">:</span> <span class="n">Correcting</span> <span class="n">the</span> <span class="n">Past</span> <span class="n">by</span> <span class="n">Shaping</span> <span class="n">the</span> <span class="n">Future</span><span class="o">...</span> <span class="ow">in</span> <span class="n">Post</span><span class="o">-</span><span class="n">Training</span> <span class="n">Quantization</span><span class="p">},</span>
      <span class="n">author</span><span class="o">=</span><span class="p">{</span><span class="n">Shihao</span> <span class="n">Zhang</span> <span class="ow">and</span> <span class="n">Haoyu</span> <span class="n">Zhang</span> <span class="ow">and</span> <span class="n">Ian</span> <span class="n">Colbert</span> <span class="ow">and</span> <span class="n">Rayan</span> <span class="n">Saab</span><span class="p">},</span>
      <span class="n">year</span><span class="o">=</span><span class="p">{</span><span class="mi">2025</span><span class="p">},</span>
      <span class="n">eprint</span><span class="o">=</span><span class="p">{</span><span class="mf">2505.11695</span><span class="p">},</span>
      <span class="n">archivePrefix</span><span class="o">=</span><span class="p">{</span><span class="n">arXiv</span><span class="p">},</span>
      <span class="n">primaryClass</span><span class="o">=</span><span class="p">{</span><span class="n">cs</span><span class="o">.</span><span class="n">LG</span><span class="p">},</span>
      <span class="n">url</span><span class="o">=</span><span class="p">{</span><span class="n">https</span><span class="p">:</span><span class="o">//</span><span class="n">arxiv</span><span class="o">.</span><span class="n">org</span><span class="o">/</span><span class="nb">abs</span><span class="o">/</span><span class="mf">2505.11695</span><span class="p">},</span>
<span class="p">}</span>
</pre></div>
</div>
<p>Note that this tutorial is not intended to reproduce all the experiments from the original
paper. To more accurately reproduce experiments from the paper, please see <a class="reference external" href="https://github.com/i-colbert/brevitas/tree/qronos/src/brevitas_examples/llm">this</a> branch.</p>
</section>
<section id="references">
<h2><a class="toc-backref" href="#id14" role="doc-backlink">References</a><a class="headerlink" href="#references" title="Permalink to this heading">#</a></h2>
<p>[1] Frantar, Elias, et al. “OPTQ: Accurate post-training quantization for generative pre-trained transformers.” 11th International Conference on Learning Representations. 2023.</p>
<p>[2] Lybrand, Eric, and Rayan Saab. “A greedy algorithm for quantizing neural networks.” Journal of Machine Learning Research 22.156 (2021): 1-38.</p>
<p>[3] Ashkboos, Saleh, et al. “QuaRot: Outlier-free 4-bit inference in rotated LLMs.” Advances in Neural Information Processing Systems 37 (2024): 100213-100240.</p>
<p>[4] Zhang, Aozhong, et al. “MagR: Weight magnitude reduction for enhancing post-training quantization.” arXiv preprint arXiv:2406.00800 (2024).</p>
<p>[5] Liu, Zechun, et al. “SpinQuant: LLM quantization with learned rotations.” arXiv preprint arXiv:2405.16406 (2024).</p>
</section>
</section>


                </article>
              
              
              
              
              
                <footer class="prev-next-footer d-print-none">
                  
<div class="prev-next-area">
    <a class="left-prev"
       href="index.html"
       title="previous page">
      <i class="fa-solid fa-angle-left"></i>
      <div class="prev-next-info">
        <p class="prev-next-subtitle">previous</p>
        <p class="prev-next-title">Papers</p>
      </div>
    </a>
    <a class="right-next"
       href="../user_guide/index.html"
       title="next page">
      <div class="prev-next-info">
        <p class="prev-next-subtitle">next</p>
        <p class="prev-next-title">User Guides</p>
      </div>
      <i class="fa-solid fa-angle-right"></i>
    </a>
</div>
                </footer>
              
            </div>
            
            
              
                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">


  <div class="sidebar-secondary-item">
<div
    id="pst-page-navigation-heading-2"
    class="page-toc tocsection onthispage">
    <i class="fa-solid fa-list"></i> On this page
  </div>
  <nav class="bd-toc-nav page-toc" aria-labelledby="pst-page-navigation-heading-2">
    <ul class="visible nav section-nav flex-column">
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#about-the-algorithm">About the Algorithm</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#getting-started">Getting Started</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#how-to-use-few-bit-llm-quantization">How to Use: Few-Bit LLM Quantization</a><ul class="nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#weight-only-quantization">Weight-only quantization</a><ul class="nav section-nav flex-column">
<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#and-4-bit-weights">3 and 4 bit weights</a></li>
<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#or-1-58-bit-weights">2 or 1.58 bit weights</a></li>
</ul>
</li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#weight-activation-quantization">Weight-activation quantization</a><ul class="nav section-nav flex-column">
<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#quarot-with-int4-and-mxfp4">QuaRot with INT4 and MXFP4</a></li>
<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#spinquant-with-int4-and-mxfp4">SpinQuant with INT4 and MXFP4</a></li>
</ul>
</li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#gguf-q4-0-model-export-for-llama-cpp">GGUF:Q4_0 model export for llama.cpp</a></li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#citation">Citation</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#references">References</a></li>
</ul>
  </nav></div>

  <div class="sidebar-secondary-item">

  <div class="tocsection sourcelink">
    <a href="../_sources/papers/qronos.rst.txt">
      <i class="fa-solid fa-file-lines"></i> Show Source
    </a>
  </div>
</div>

</div></div>
              
            
          </div>
          <footer class="bd-footer-content">
            
          </footer>
        
      </main>
    </div>
  </div>
  
  <!-- Scripts loaded after <body> so the DOM is not blocked -->
  <script src="../_static/scripts/bootstrap.js?digest=3ee479438cf8b5e0d341"></script>
<script src="../_static/scripts/pydata-sphinx-theme.js?digest=3ee479438cf8b5e0d341"></script>

  <footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
  
    <div class="footer-items__start">
      
        <div class="footer-item">

  <p class="copyright">
    
      © Copyright 2025 - Advanced Micro Devices, Inc..
      <br/>
    
  </p>
</div>
      
        <div class="footer-item">

  <p class="sphinx-version">
    Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 5.3.0.
    <br/>
  </p>
</div>
      
    </div>
  
  
  
    <div class="footer-items__end">
      
        <div class="footer-item">
<script>
document.write(`
  <div class="version-switcher__container dropdown">
    <button id="pst-version-switcher-button-2"
      type="button"
      class="version-switcher__button btn btn-sm navbar-btn dropdown-toggle"
      data-bs-toggle="dropdown"
      aria-haspopup="listbox"
      aria-controls="pst-version-switcher-list-2"
      aria-label="Version switcher list"
    >
      Choose version  <!-- this text may get changed later by javascript -->
      <span class="caret"></span>
    </button>
    <div id="pst-version-switcher-list-2"
      class="version-switcher__menu dropdown-menu list-group-flush py-0"
      role="listbox" aria-labelledby="pst-version-switcher-button-2">
      <!-- dropdown will be populated by javascript on page load -->
    </div>
  </div>
`);
</script></div>
      
    </div>
  
</div>

  </footer>
  </body>
</html>